# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
from elasticsearch import Elasticsearch
import re
from gov.settings import INDEX_MAPPING,ES_URL
class GovPipeline(object):
    def open_spider(self, spider):
        self.es = Elasticsearch(ES_URL,timeout=1000)
        if not self.es.indices.exists('index'):
            self.es.indices.create(index='index',body=INDEX_MAPPING)
    def close_spider(self, spider):
        # 关闭数据库
        pass

    def process_item(self, item, spider):
        print(item)
        # id = re.findall(r'.*/(.*?)\.s?html',item['url'])[0]
        self.es.index(index='index',doc_type='text',id=item['url'],body=dict(item))
        return item